#####################################################################
# Collection of functiosn to assist with calibration
#####################################################################

##### Data manipulation helpers

# This one is generally useful: function to select an arbitrary quantile index from an object
which.quantile <- function (x, probs, na.rm = FALSE){
  if (! na.rm & any (is.na (x))) # nolint
  return (rep (NA_integer_, length (probs)))

  o <- order (x)
  n <- sum (! is.na (x))
  o <- o [seq_len (n)]

  nppm <- n * probs - 0.5
  j <- floor(nppm)
  h <- ifelse((nppm == j) & ((j%%2L) == 0L), 0, 1)
  j <- j + h

  j [j == 0] <- 1
  o[j]
}

make_objecttype_series <- function(target_series){
	sat_series <- target_series %>%	filter(Sector=="commercial") %>%  select(contains("total_sats")) %>% unlist() %>% as.numeric()
	agg_sat_series <- target_series %>%	filter(Sector=="commercial") %>%  select(contains("agg_sats")) %>% unlist() %>% as.numeric()
	deb_series <- target_series %>%	filter(Sector=="commercial") %>%  select(contains("total_debris")) %>% unlist() %>% as.numeric()
	RB_series <- target_series %>% filter(Sector=="commercial") %>% select("RB") %>% unlist() %>% as.numeric()

	launch_series <- target_series %>% group_by(year) %>% mutate(total_launches = sum(quantity_launched)) %>% ungroup() %>% filter(Sector=="commercial") %>% select(total_launches) %>% unlist() %>% as.numeric()
	years <- target_series %>% filter(Sector=="commercial") %>% select(contains("year")) %>% unlist() %>% as.numeric()
	dfrm <- data.frame(year=years, launches = launch_series, sats=sat_series, debs=deb_series, agg_sats=agg_sat_series)

	return(dfrm)
}

# Average object mass
calc_object_masses <- function(target_series, object_chars) {
	RB_share <- target_series$RB/target_series$total_debris
	IP_share <- target_series$IP/target_series$total_debris
	COF_share <- target_series$COF/target_series$total_debris
	MRO_share <- target_series$MRO/target_series$total_debris

	RB_mass <- object_chars$mass[5]
	IP_mass <- mean(object_chars$mass[c(1:4)])
	COF_mass <- 1.309
	MRO_mass <- (object_chars$mass[5] + IP_mass + COF_mass)/3

	average_deb_mass <- mean(RB_share)*RB_mass + mean(IP_share)*IP_mass + mean(COF_share)*COF_mass + mean(MRO_share)*MRO_mass
	sd_deb_mass <- sqrt(var(RB_share)*RB_mass + var(IP_share)*IP_mass + var(COF_share)*COF_mass + var(MRO_share)*MRO_mass)

	civil_share <- target_series$Civil_Active_Payload/target_series$total_sats
	commercial_share <- target_series$Commercial_Active_Payload/target_series$total_sats
	defense_share <- target_series$Defense_Active_Payload/target_series$total_sats
	other_share <- target_series$Other_Active_Payload/target_series$total_sats

	average_sat_mass <- mean(civil_share)*object_chars$mass[1] + mean(commercial_share)*object_chars$mass[2] + mean(defense_share)*object_chars$mass[3] + mean(other_share)*object_chars$mass[4]

	return(data.frame(average_sat_mass=average_sat_mass, average_deb_mass=average_deb_mass, sd_deb_mass=sd_deb_mass))
}



# Debris decay rate
calc_phys_delta <- function(target_series, residence_times){
	target_residence <- residence_times %>% filter(hcnt <= max(shell_of_interest) & hcnt >= min(shell_of_interest))

	intact_share <- (target_series$RB + target_series$IP + target_series$MRO)/target_series$total_debris
	fragment_share <- target_series$COF/target_series$total_debris
	mean(intact_share) + mean(fragment_share) # check that the means add up to 1
	average_residence_time <- mean(intact_share)*target_residence$intact + mean(fragment_share)*target_residence$fragment 

	phys_delta <- 1/average_residence_time # 1/residence time is probability that object stays in orbit that period, phys_delta is the complement
	return(phys_delta)
}

# Collision rate for empirical things
L_e <- function(S,D,params) {
	probs <- calc_intrprob_coefs(params, params, state=data.frame(S=S, D=D))
	output <- probs$p_sat_sat + probs$p_sat_deb - probs$p_sat_sat*probs$p_sat_deb
	return(output)
}

# Derivatives of collision rate for empirical things
L_S_e <- function(S,D,params) {
	aSS <- params$alpha_SS
	aSD <- params$alpha_SD
	(1 - params$avoidance_fraction_ss)*aSS*exp(- aSS*S - aSD*D)
}

L_D_e <- function(S,D,params) {
	aSS <- params$alpha_SS
	aSD <- params$alpha_SD	
	(1 - params$avoidance_fraction_sd)*aSD*exp(- aSS*S - aSD*D)
}

# Debris growth function for empirical things
G_e <- function(S,D,params) {
	alpha_SS <- params$alpha_SS
	alpha_SD <- params$alpha_SD
	alpha_DD <- params$alpha_DD
	beta_SS <- params$beta_SS
	beta_SD <- params$beta_SD
	beta_DD <- params$beta_DD
	avoidance_fraction_ss <- params$avoidance_fraction_ss
	avoidance_fraction_sd <- params$avoidance_fraction_sd

	beta_SS*(1 - avoidance_fraction_ss)*(1-exp(-alpha_SS*S))*S + beta_SD*(1 - avoidance_fraction_sd)*(1-exp(-alpha_SD*D))*S + beta_DD*(1-exp(-alpha_DD*D))*D
}

# average new fragments function S derivative
G_S_e <- function(S,D,params) {
	aSS <- params$alpha_SS
	aSD <- params$alpha_SD
	bSS <- params$beta_SS
	bSD <- params$beta_SD
	avoidance_fraction_ss <- params$avoidance_fraction_ss
	avoidance_fraction_sd <- params$avoidance_fraction_sd

	bSS*(1 - avoidance_fraction_ss)*(1-exp(-aSS*S)) + bSS*aSS*S*(1 - avoidance_fraction_ss)*exp(-aSS*S) + bSD*(1 - avoidance_fraction_sd)*(1-exp(-aSD*D))
}

# average new fragments function D derivative
G_D_e <- function(S,D,params) {
	aSD <- params$alpha_SD
	aDD <- params$alpha_DD
	bSD <- params$beta_SD
	bDD <- params$beta_DD
	avoidance_fraction_ss <- params$avoidance_fraction_ss
	avoidance_fraction_sd <- params$avoidance_fraction_sd

	aSD*bSD*S*(1 - avoidance_fraction_sd)*exp(-aSD*D) + bDD*(1-exp(-aDD*D)) + aDD*bDD*exp(-aDD*D)*D
}

# Satellite law of motion 
S_e_ <- function(X,S,D,params) {
	S*(1 - L_e(S,D,params)) + X
}

# Debris law of motion
D_e_ <- function(X,S,D,params) {
	stock <- D*(1-params$delta) + G_e(S,D,params) + params$m*X
	stock[which(stock=="NaN")] <- D
	stock[which(stock=="NA")] <- D
	stock
}

# analytical expression for launch rate assuming negative exponential collision rate
eqm_launch_rate <- function(S,D,params) {
	excess_return <- params$excess_return
	delta <- params$delta
	aSS <- params$alpha_SS
	aSD <- params$alpha_SD
	aDD <- params$alpha_DD
	bSD <- params$beta_SD
	bDD <- params$beta_DD
	m <- params$m
	avoidance_fraction_ss <- params$avoidance_fraction_ss
	avoidance_fraction_sd <- params$avoidance_fraction_sd

	num <- log(1-excess_return) + aSS*(1-L_e(S,D,params))*S + aSD*(D*(1-delta)+G_e(S,D,params))
	den <- aSS + aSD*m
	
	launches <- -num/den
	launches <- pmax(launches,0)
	return(launches)
}

# Fragment autocatalysis rate function for empirical things
FAR <- function(S,D,params) {
	result <- G_D_e(S,D,params) - params$delta
	return(result)
}

# Steady state condition, solved by nleqslv. This calculates the steady states of the laws of motion and equilibrium launch function, assuming current economic parameters hold indefinitely.
ss_condition_e <- function(init,params) {
	S <- init[1]
	D <- init[2]
	X <- eqm_launch_rate(S,D,params)
	S_imbalance <- S_e_(X,S,D,params) - S
	D_imbalance <- D_e_(X,S,D,params) - D
	return(c(S_imbalance,D_imbalance))
}

# Open-access steady state finder with empirical functions. Returns an (S,D) pair.
oass_finder_e <- function(init,params) {
	solution <- nleqslv(init, ss_condition_e, params=params)$x
	return(solution)
}

# "Myopic" Kessler Syndrome condition function for empirical things. Calculates the maximum sustainable debris stock given continued open-access behavior at the current economic parameters (assuming current econ parameters stay constant indefinitely). Myopia ensures boundedness of the Kessler time, given indefinite deterministic growth. Returns TRUE if input debris stock exceeds maximum open-access steady-state debris stock.
Kessler_condition <- function(D,params) {
	# result <- FAR(S,D,params) - (L_D_e(S,D,params)/L_S_e(S,D,params))*(G_S_e(S,D,params) + params$m*L_e(S,D,params)) -- OLD APPROACH

	# Calculate steady states under sweep of initial conditions given input parameters
	init_block <- seq(from=0,to=1e5,length.out=5)
	init_mat <- expand.grid(S=init_block,D=init_block)
	steadies <- matrix(NA, nrow=nrow(init_mat), ncol=2)
	names(init_mat) <- NULL
	for(i in 1:nrow(init_mat)) {
		steadies[i,] <- oass_finder_e(init=as.numeric(init_mat[i,]), params=params)
	}

	steadies <- round(steadies,1)
	steadies <- unique(steadies)
	steadies

	result <- (as.numeric(max(steadies[,2]))<D)

	return(result)
}

# Satellite law of motion using empirical things
S_e_ <- function(X,S,D,params) {
	S*(1 - L_e(S,D,params))*params$mu + X
}

# Debris law of motion using empirical things
D_e_ <- function(X,S,D,params) {
	stock <- D*(1-params$delta) + G_e(S,D,params) + params$m*X
	stock[which(stock=="NaN")] <- D
	stock[which(stock=="NA")] <- D
	stock
}

##### Physics things
velocity_calc <- function(hcnt) { sqrt(3.986004415e+5/(hcnt+6378)) } # units of km/s
volume_calc <- function(hcnt){ (4/3)*pi*((hcnt+25)^3 - (hcnt-25)^3) } # units of km^3
pi_vel_vol_calc <- function(velocity, volume) { pi*(velocity/volume) } # units of 1/(s km^2)
collrate_coef <- function( hcnt, area ) {  (velocity_calc(hcnt)*area)/volume_calc(hcnt) }

#
calc_alphas <- function(object_chars, mean_altitude){ # This is based on the approximate formula for spatial density in Letizia's thesis, specifically equation (B.25) on page 266.
	sat_xsect_area <- object_chars[-5,] %>% select(x_sect_avg) %>% summarize(mean = mean(x_sect_avg)) %>% as.numeric()
	deb_xsect_area <- object_chars[5,] %>% select(x_sect_avg) %>% summarize(mean = (x_sect_avg + 0.1)/ 2) %>% as.numeric()
	areas <- data.frame(sat_xsect_area = sat_xsect_area, deb_xsect_area = deb_xsect_area)

	alpha_SS <- collrate_coef(mean_altitude, sat_xsect_area)
	alpha_SD <- collrate_coef(mean_altitude, sat_xsect_area)
	alpha_DD <- collrate_coef(mean_altitude, deb_xsect_area)

	alphas <- data.frame(alpha_SS = alpha_SS, alpha_SD = alpha_SD, alpha_DD = alpha_DD)

	return(alphas)
}

# Function to calculate the collision rate series with avoidance rates included. The idea is that this should return a series showing how many collisions are expected to occur between each object species pairing, given avoidance rates and intrinsic collision probabilities
calc_intrprob_coefs <- function(alphas, avoidance_rates, state) {
	avoidance_fraction_ss <- avoidance_rates$avoidance_fraction_ss
	avoidance_fraction_sd <- avoidance_rates$avoidance_fraction_sd
	alpha_SS <- alphas$alpha_SS
	alpha_SD <- alphas$alpha_SD
	alpha_DD <- alphas$alpha_DD
	sat_series <- state$S
	deb_series <- state$D

	p_sat_sat <- (1 - avoidance_fraction_ss)*(1 - exp(-alpha_SS*sat_series))
	p_sat_deb <- (1 - avoidance_fraction_sd)*(1 - exp(-alpha_SD*deb_series))
	p_deb_deb <- 1 - exp(-alpha_DD*deb_series)

	return(data.frame(p_sat_sat = p_sat_sat, p_sat_deb = p_sat_deb, p_deb_deb = p_deb_deb))
}

calc_nfrags <- function(M_e) {	0.1*(M_e^0.75)*(0.1^(-1.71)) } # Based on equation (3.29) from Letizia thesis.

# Function to generate all the physical coefficients. It will need to accept a vector containing: alphas, avoidance rates, dfrm, average_sat_mass, average_deb_mass. It will need to return only one vector, phys_coefs, that has all we need.
generate_phys_coefs <- function(parm_list, adjustment="ridge"){

	alphas <- parm_list$alphas
	avoidance_rates <- parm_list$avoidance_rates

	dfrm <- parm_list$dfrm
	betas <- parm_list$betas
		beta_SS <- betas$beta_SS
		beta_SD <- betas$beta_SD
		beta_DD <- betas$beta_DD

	# This should return: p_sat_sat, p_sat_deb, p_deb_deb. There should be as many rows as there are rows in dfrm, since this is returning the probability of collisions (probability units) between object species pairings each year
	intrprob_coefs <- calc_intrprob_coefs(alphas, avoidance_rates, state = data.frame(S = dfrm$sats, D = dfrm$debs))
	
	# Pull these out and store them in separate objects
	p_sat_sat <- intrprob_coefs$p_sat_sat
	p_sat_deb <- intrprob_coefs$p_sat_deb
	p_deb_deb <- intrprob_coefs$p_deb_deb
	# Multiply by species population sizes to go from probability units to object units
	p_ss__S <- dfrm$sats*p_sat_sat
	p_sd__D <- dfrm$debs*p_sat_deb
	p_dd__D <- dfrm$debs*p_deb_deb

	### Calculate probability an active satellite gets destroyed
	L_t <- p_sat_sat + p_sat_deb - p_sat_sat*p_sat_deb # construct L(S_t,D_t) from the probability definition

	# Calculate expected number of fragments (series) from the collisions. Divide by avoidance rates to undo that here; the net effect of (historical) avoidance is going to be baked into the ridge coefficients. That is, avoidance enters into the firms' decision-making explicitly through the collision probability function. It enters into the law of motion for debris implicitly through the ridge adjustment coefficients. The ridge coefficients ought to also account for some degree of mass loss (into fragments too small to cause damage) upon breakup.
	# Practically the effect of this is to bring the adjustment coefficients all <1. Without this, the coefficients on SS and SD coefficients end up being much >>1 (roughly 80 and 4), which doesn't make sense if paths are coordinated to reduce collisions and there is loss of mass upon collision.
	B_p_ss__S <- beta_SS*p_ss__S/(1-avoidance_rates$avoidance_fraction_ss)
	B_p_sd__D <- beta_SD*p_sd__D/(1-avoidance_rates$avoidance_fraction_sd)
	B_p_dd__D <- beta_DD*p_dd__D

	### Calculate adjustment coefficients to reflect the fact that sats&debs are not moving randomly
	D_next <- c(dfrm$debs[-1],NA) # construct D_{t+1}
	D_resid <- pmax(D_next - (1-phys_delta)*c(dfrm$debs),0)
	# D_resid <- pmax(D_next - (1 - phys_delta)*c(dfrm$debs) - B_p_ss__S - B_p_sd__D - B_p_dd__D,0)
	# D_resid <- pmax(D_next - phys_delta*c(dfrm$debs) - m_hat*dfrm$launches,0) # subtract off what we know/can estimate from the physical model: the launch debris coupling (average number of launch debris per launch) and the debris decay rate (calculated from fragment residence time). seems to work ok

	# Set up physical object dataframe
	phys_dfrm <- data.frame(dfrm, D_next = D_resid, B_p_ss__S, B_p_sd__D, B_p_dd__D, L_t)
	phys_mat <- as.matrix(phys_dfrm %>% select(c("B_p_ss__S","B_p_sd__D","B_p_dd__D","launches")))[-nrow(dfrm),] # drop the last row with NA in 2020

	# Ridge regression: can estimate all the adjustment factors despite collinearity, and regularization helps control prediction variance due to small sample size (4 parameters, 14 observations). This is important & useful since the goal is forward simulation using the parameters, not statistical inference on the parameters.
	m2 <- glmnet(
		x=phys_mat,y=D_resid[-nrow(dfrm)], alpha=0, 
		lower.limits=c(0, 0, 0, 0), upper.limits=c(Inf, Inf, Inf, Inf), 
		lambda=cv.glmnet(x=phys_mat,y=D_resid[-nrow(dfrm)],alpha=0, intercept=FALSE)$lambda.min, 
		intercept=FALSE)
	coef(m2) # using high avoidance rates in calculating B_p_xx_X produces unrealistically large fragmentation coefficients here... Setting the avoidance rates to zero looks better, but then we don't have avoidance in there properly

	m2xvars <- as.matrix(cbind(subset(phys_dfrm,select=colnames(phys_mat))))
	m2coefs <- coef(m2)[-1]
	names(m2coefs) <- colnames(phys_mat)

	write.csv(m2coefs,file="calibrated_debris_lom_coefs.csv")

	m2coefs <- as.data.frame(m2coefs)

	### Avoid applying ridge adjustment if so instructed
	if(adjustment=="none"){
		m2coefs["launches",] <- 1
		m2coefs["B_p_ss__S",] <- 1
		m2coefs["B_p_sd__D",] <- 1
		m2coefs["B_p_dd__D",] <- 1
	}

	### Estimate launch debris coupling. The launch debris coupling is dD_{t+1}/dX_t = m. Pull m_hat from the ridge on debris couplings, i.e. estimate new fragment formation couplings jointly. This fits well, but seems to produce a lot of launches when there's no debris -- intuitive given how weak the launch coupling is. Remember to adjust phys_mat by INCLUDING launches, since we're estimating m there. This is probably plausible... people don't want to shit where they eat.
	m_hat <- m2coefs["launches",]

	#####
	# Evaluate model fits
	#####

	### Satellite model

	### Debris model
	# debris_xvars <- (phys_dfrm %>% select(-c("year","L_t","sats", "D_next")))[-length(dfrm$year)] %>% as.matrix() # final [-x] is to remove the final year with NA for D_nextto
	debris_xvars <- (phys_dfrm %>% select(c("launches","debs","B_p_ss__S", "B_p_sd__D", "B_p_dd__D")))[-length(dfrm$year)] %>% as.matrix() # final [-x] is to remove the final year with NA for D_nextto
	debris_coefs <- as.matrix(c(m_hat, (1 - phys_delta), m2coefs["B_p_ss__S",], m2coefs["B_p_sd__D",], m2coefs["B_p_dd__D",]))
	debris_coefs_unadjusted <- as.matrix(c(1, (1 - phys_delta), 1, 1, 1))

	# print(m2coefs)
	mean(D_next - debris_xvars %*% debris_coefs_unadjusted, na.rm=TRUE) # The unadjusted model overpredicts

	# fitplot(debris_xvars, debris_coefs_unadjusted, dfrm$year, D_next, "Fit", "Debris") #The unadjusted model overpredicts
	# fitplot(debris_xvars, debris_coefs, dfrm$year, D_next, "Fit", "Debris") 
	# Interesting how it looks about a year lagged behind D_{t+1}. It fits really well, but consistently under, the path of D_t itself. 

	phys_coefs <- data.frame(alpha_SS = alphas$alpha_SS, alpha_SD = alphas$alpha_SD, alpha_DD = alphas$alpha_DD, avoidance_fraction_ss = avoidance_fraction_ss, avoidance_fraction_sd = avoidance_fraction_sd, delta = phys_delta, m2_bss = m2coefs["B_p_ss__S",], m2_bdd = m2coefs["B_p_dd__D",], m2_bsd = m2coefs["B_p_sd__D",], bss = beta_SS, bsd = beta_SD, bdd = beta_DD, beta_SS = m2coefs["B_p_ss__S",]*beta_SS, beta_SD = m2coefs["B_p_sd__D",]*beta_SD, beta_DD = m2coefs["B_p_dd__D",]*beta_DD, m = m_hat, L_t = phys_dfrm$L_t[nrow(phys_dfrm)], mu = mean_sat_residence)
	
	output <- list(phys_coefs = phys_coefs, L_t = L_t)

	return(output)
}

##### Economics things
returns_fn <- function( p, a, period, S, elasticity) { p*exp(a*period)*S^(elasticity) } # measured as a rate of return, but can also receive p directly

# Convenience function to fit occupancy elasticity regressions. Useful in the bootstrap.
oem_fitter <- function(df) {
	lm(value ~ time + log(sats), data=df)
}

# Convenience function to fit cost growth regressions. Useful in the bootstrap.
ftft_fitter <- function(df) {
	lm(value ~ time, data=df)
}

# Function to calculate coefficients to adjust aggregate satellite returns and costs series to match a target shell. pi_t and F_t should, as far as possible, only represent LEO values. So pi_t ends up being communications and observation but not GNT, and F_t ends up having SSA.
generate_econ_coefs <- function(target_series, p, a, elasticity, payoff_type="flat"){

	target_series <- target_series %>% filter(Sector=="commercial")

	econ_series <- target_series %>% 
	mutate(pi_t = Satellite.Communications + Earth.Observation,
		p = p,
		a = a,
		normalized_period = year - target_series$year[1],
		calculated_pi_t = returns_fn(p, a, normalized_period, total_sats, elasticity),
		F_t = Ground.Stations.and.Equipment + Insurance.Premiums + Satellite.Launch.Industry..Commercial. + Satellite.Manufacturing..Commercial. + Space.Situational.Awareness,
		L_t = L_t
		) %>% select(year, p, a, normalized_period, calculated_pi_t, pi_t, F_t, L_t) 

	econ_series <- econ_series[-nrow(econ_series),]
	econ_series <- econ_series %>% mutate(r_st = calculated_pi_t/F_t, Ft_Ft = c(NA,F_t[-nrow(econ_series)]/F_t[-1])) %>% mutate(time = year - year[1])

	econrisk_model <- lm(L_t ~ r_st + Ft_Ft, data=econ_series)
	summary(econrisk_model)

	econ_coefs <- coef(econrisk_model)

	return(list(econ_series=econ_series, econ_coefs=econ_coefs, econ_model=econrisk_model))
}

# Function to calculate the open access equilibrium launch rate from empirical coefficients and variables
eqm_cond_empirical <- function(launches, econ_coefs, econ_vars, phys_coefs, state, first_revision_variant=TRUE, elasticity_draw=0) {
	S <- state$S
	D <- state$D
	mean_sat_lifetime <- phys_coefs$mean_sat_residence
	alpha_SS <- phys_coefs$alpha_SS
	alpha_SD <- phys_coefs$alpha_SD
	alpha_DD <- phys_coefs$alpha_DD
	avoidance_fraction_ss <- phys_coefs$avoidance_fraction_ss
	avoidance_fraction_sd <- phys_coefs$avoidance_fraction_sd
	delta <- phys_coefs$delta
	beta_SS <- phys_coefs$beta_SS
	beta_SD <- phys_coefs$beta_SD
	beta_DD <- phys_coefs$beta_DD
	m <- phys_coefs$m
	current_Lt <- phys_coefs$L_t

	# calculate current physical coefficients
	p_ss__S <- S*(1 - avoidance_fraction_ss)*(1 - exp(-alpha_SS*S))
	p_sd__D <- D*(1 - avoidance_fraction_sd)*(1 - exp(-alpha_SD*D))
	p_dd__D <- D*(1 - exp(-alpha_DD*D))

	# propagate state
	S_next <- S*(1 - current_Lt)*mean_sat_residence + launches # assume all end-of-life satellites are immediately, successfully, and safely deorbited
	D_next <- D*(1 - delta) + beta_SS*p_ss__S + beta_SD*p_sd__D + beta_DD*p_dd__D + m*launches

	# calculate next-period collision risk
	p_sat_sat <- (1 - avoidance_fraction_ss)*(1 - exp(-alpha_SS*S_next))
	p_sat_deb <- (1 - avoidance_fraction_sd)*(1 - exp(-alpha_SD*D_next))
	L_next <- p_sat_sat + p_sat_deb - p_sat_sat*p_sat_deb

	# update econ vars to account for 
	if(first_revision_variant==TRUE) {
		econ_vars$r_st <- econ_vars$r_st*S_next^elasticity_draw
	}

	output <- 1e+10*(sum(econ_coefs*econ_vars) - L_next)^2
	return(output)
}

## Equilibrium condition without squaring at the end -- useful for checking corners
eqm_cond_empirical_raw <- function(launches, econ_coefs, econ_vars, phys_coefs, state, first_revision_variant=TRUE, elasticity_draw=0) {
	S <- state$S
	D <- state$D
	mean_sat_lifetime <- phys_coefs$mean_sat_residence
	alpha_SS <- phys_coefs$alpha_SS
	alpha_SD <- phys_coefs$alpha_SD
	alpha_DD <- phys_coefs$alpha_DD
	avoidance_fraction_ss <- phys_coefs$avoidance_fraction_ss
	avoidance_fraction_sd <- phys_coefs$avoidance_fraction_sd
	delta <- phys_coefs$delta
	beta_SS <- phys_coefs$beta_SS
	beta_SD <- phys_coefs$beta_SD
	beta_DD <- phys_coefs$beta_DD
	m <- phys_coefs$m
	current_Lt <- phys_coefs$L_t

	# calculate current physical coefficients
	p_ss__S <- S*(1 - avoidance_fraction_ss)*(1 - exp(-alpha_SS*S))
	p_sd__D <- D*(1 - avoidance_fraction_sd)*(1 - exp(-alpha_SD*D))
	p_dd__D <- D*(1 - exp(-alpha_DD*D))

	# propagate state
	S_next <- S*(1 - current_Lt)*mean_sat_residence + launches # assume all end-of-life satellites are immediately, successfully, and safely deorbited
	D_next <- D*(1 - delta) + beta_SS*p_ss__S + beta_SD*p_sd__D + beta_DD*p_dd__D + m*launches

	# calculate next-period collision risk
	p_sat_sat <- (1 - avoidance_fraction_ss)*(1 - exp(-alpha_SS*S_next))
	p_sat_deb <- (1 - avoidance_fraction_sd)*(1 - exp(-alpha_SD*D_next))
	L_next <- p_sat_sat + p_sat_deb - p_sat_sat*p_sat_deb

	# update econ vars to account for 
	if(first_revision_variant==TRUE) {
		econ_vars$r_st <- econ_vars$r_st*S_next^elasticity_draw
	}

	output <- (sum(econ_coefs*econ_vars) - L_next)
	return(output)
}

##### Bootstrap things


# Function to generate bootstrap residuals using only a model, data, and B
generate_bootstrap_dvs <- function(model, data, B=100, modeltype="growth") {
	
	resids <- as.numeric(resid(model))
	resid_se <- sqrt( (1/(length(resids) - length(coef(model))) ) * resids%*%resids)

	#### generate bootstrap resamples of residuals
	resample_clock_start <- proc.time()[3]
	set.seed(seednum)
	resample_order <- replicate(B,sample(c(1:length(resids)),length(resids), replace = TRUE)) # this creates a matrix with length(resids)-many rows, and B-many columns. Each column is a particular resampled order of the residuals for the bootstrap. ######### NEED TO MAKE THIS FASTER FOR BIG VALUES OF B
	resample_clock_end <- proc.time()[3]
	message("Total time for resampling: ", round(resample_clock_end - resample_clock_start, 3)," seconds")
	resids_resamples <- matrix(resids[resample_order], ncol=B, byrow=FALSE) # now generate a matrix of the actual resampled residuals, according to the orders generated above

	#### Generate model predictions and resampled dependent variables
	model_output <- as.numeric(predict(model, data=data)) + (resid_se^2)/2 # generate just the predictions from the main model -- with log-log "smearing" bias correction

	bootstrap_dv <- as.data.frame(model_output + resids_resamples) # generate all the bootstrapped outcomes, turn that matrix into a dataframe.

	colnames(bootstrap_dv) <- 1:B # give them names
	bootstrap_dv <- cbind(time=1:nrow(bootstrap_dv), bootstrap_dv)
	bs_dfrm <- bootstrap_dv %>% pivot_longer(!time, names_to="sim", values_to="value") %>% arrange(sim)
	head(bs_dfrm)
	dim(bs_dfrm)
	
	if("log(sats)"%in%names(model$coefficients)) {
		bs_xvars <- data %>% filter(complete.cases(.)) %>% select(sats)
		bs_dfrm <- cbind(bs_dfrm, bs_xvars)
	}

	bs_dfrm_nested <- bs_dfrm %>% group_by(sim) %>% nest()

	return(bs_dfrm_nested)

}

# Function to run the residual bootstrap using only a nested dataframe and a function for the model to be fit
generate_bootstrap_samples <- function(nested_df, fn) {
	#### The bootstrap
	bootstrap_clock_start <- proc.time()[3]
	bs_dfrm_nested <- nested_df %>% mutate(model = map(data, fn), estimates=map(model, coef), varnames=map(estimates, names))
	bootstrap_clock_end <- proc.time()[3]
	message("Total time for bootstrapping: ", round(bootstrap_clock_end - bootstrap_clock_start, 3)," seconds")

	return(bs_dfrm_nested)
}

# Function to unnest and write out estimates. Takes a nested dataframe and "type" \in {oem, ftft}.
unnest_and_write_out <- function(nested_dfrm,type) {
	unnest_clock_start <- proc.time()[3]
	nested_unnested_estimates <- unnest(data=nested_dfrm, cols=c(estimates,varnames))
	unnest_clock_end <- proc.time()[3]
	message("Total time for unnesting: ", round(unnest_clock_end - unnest_clock_start, 3)," seconds")

	output <- nested_unnested_estimates %>% select(sim, estimates, varnames) %>% ungroup() %>% pivot_wider(names_from = varnames, values_from = estimates )

	if(type=="oem") { 
		output <- output %>% rename("intercept" = '(Intercept)',"log_sats" = 'log(sats)') %>% mutate(sim=as.numeric(sim)) %>% arrange(sim) 
		out_name <- paste0("bs_oem_output.csv")
	}
	if(type=="ftft") { 
		output <- output %>% rename("intercept" = '(Intercept)') %>% mutate(sim=as.numeric(sim)) %>% arrange(sim) 
		out_name <- paste0("bs_ftft_output.csv")
	}
	 
	write.csv(output, file=out_name)

	return(output)
}

# Function to generate a sample from the prediction distribution (i.e., generate the prediction intervals, then calculate the implied SD from the t-stat, then sample from the distribution of the prediction). Function accepts a model, datapoint for the new prediction, and number of bootstrap samples B; and outputs a mean and sd.
generate_prediction_sample <- function(model, data, B){

	mean_prediction <- predict(model, newdata=data, interval="prediction")
	T_a <- pt(q = (1-0.95/2), df=(B-2)) # the t-statistic used in the prediction interval. basically Inf degrees of freedom since we have like 50k or more obs.
	s_n <- abs(mean_prediction[1]/(sqrt(1 + 1/B)*T_a)) # estimated standard deviation for the prediction interval using formula for unknown mean and variance from https://en.wikipedia.org/wiki/Prediction_interval#Unknown_mean,_unknown_variance
	return(c(mean_prediction[1], s_n))

}

# Formulas for linear-IHS model, good for Kessler time regressions. Accepts a model and the associated data as input, outputs a vector of elasticities with coefficient names.
linIHS_elasticity <- function(model, data) {
	betahats <- coef(model)

	dv_name <- gsub("\\(", "", summary(model)$call[2])
	dv_name <- gsub(" [~].+", "", summary(model)$call[2])

	data <- data %>% filter(complete.cases(.))

	if("(Intercept)"%in%names(betahats)) {
		betahats <- betahats[which(names(betahats)=="(Intercept)")] # drop the intercept element
	}

	names(betahats) <- gsub("log\\(", "", names(betahats) )
	names(betahats) <- gsub("\\)", "", names(betahats) )

	y_mean <- mean(data[,dv_name] %>% unlist() %>% as.numeric(),na.rm=TRUE)

	idx <- colnames(data)%in%names(betahats)
	x_means <- colMeans(test_data[,idx],na.rm=TRUE)

	# run the linear-IHS formulas
	elasticities <- vector(mode="numeric", length=length(betahats))
	for(element in seq_along(betahats)) {
		selected_xmean <- means[names(means)%in%names(betahats[element])]
		selected_betahat <- betahats[element]
		elasticities[element] <- (selected_betahat/y_mean)*(selected_xmean/((1 + selected_xmean)^2))
	}

	names(elasticities) <- names(betahats)

	# output a vector of elasticities
	return(elasticities)
}

# Function to fit a distribution regression using a logit. Accepts as argument the dataframe and a vector of the 
distreg_fitter <- function(df) {
	xvar_names <- grep("^X.*", names(df), value=TRUE)
	formy <- reformulate(termlabels=xvar_names, response = "values")
	glm(formy, data=df, family="binomial")
}

# Function to extract data means for prediction
distreg_datameans <- function(df) {
	xvars <- df %>% select(contains("X."))
	xmeans <- as.numeric(colMeans(xvars))
	output <- data.frame(1,t(xmeans))
	colnames(output) <- c("(Intercept)", names(xvars))
	output
}

# Function to turn two numbers into data means for prediction. Accepts a 1x2 vector, outputs a 1x3 vector.
format_datameans <- function(vec) {
	out_vec <- c(1,vec)
	out_vec <- as.data.frame(out_vec)
	colnames(out_vec) <- c("(Intercept)", "X.mean_total_growth", "X.mean_returns_growth", "X.mean_cost_growth", "X.elasticity")
	return(out_vec)
}

# Function to run distribution regression. Accepts as input Y vector, dataframe of X variables, size of grid.
dist_reg <- function(input, npts=100, B=150, counterfactual_means=NULL, ncores=min(detectCores()-1,40) ) {

	Y <- input[,"Kessler_time"] %>% unlist() %>% as.numeric()
	X <- as.data.frame(input[,c("mean_total_growth","mean_returns_growth","mean_cost_growth","elasticity")])
	Y_id <- 1:length(Y)
	dfrm <- data.frame(Y_id, Y=Y, X=X)

	# Generating the grid
	nodes <- as.numeric(quantile(Y, probs = seq(0,1,length.out=npts)))#[-1] # convert it to a numeric to strip the labels, and drop the min observation since we can't do anything with it
	for(i in seq_along(nodes)) {
	  dfrm <- dfrm %>% mutate( "Y_{i}0" := ifelse(Y<=nodes[i], 1, 0) )
	}

	dfrm_wide <- dfrm %>% pivot_longer(!c("Y_id", "Y", contains("X.")), names_to="indicators", values_to="values")

	dfrm_nest <- dfrm_wide %>% group_by(indicators) %>% nest(data = c(Y_id, Y, contains("X."), values))

	if(is.null(counterfactual_means)){
		dr_clock_start <- proc.time()[3]
		dfrm_nest <- dfrm_nest %>% mutate(
				model = map(data, distreg_fitter), 
				data_means=map(.x = data, .f=distreg_datameans))
		dr_clock_end <- proc.time()[3]
	}
	if(is.null(counterfactual_means)==FALSE){
		dr_clock_start <- proc.time()[3]
		dfrm_nest <- dfrm_nest %>% mutate(
				model = map(data, distreg_fitter), 
				data_means = list(counterfactual_means))
		dr_clock_end <- proc.time()[3]
		dfrm_nest
		dfrm_nest$data_means[[1]]
	}
	
	message("Total time for distribution regressions: ", round(dr_clock_end - dr_clock_start, 3)," seconds. Estimating CDF...")
	dfrm_nest

	prediction_bs_vec <- rep(NA,length.out=B)

	cdf_hat <- data.frame(predictions=rep(NA,length.out=nrow(dfrm_nest)), prediction_se=rep(NA,length.out=nrow(dfrm_nest)), clock=rep(NA,length.out=nrow(dfrm_nest)))

	cdfhat_time_start <- proc.time()[3]
	cl <- makeCluster( ncores , type="FORK")
	registerDoParallel(cl)
	setDefaultCluster(cl=cl)

	cdf_hat <- foreach(i=c(1:nrow(dfrm_nest)), .inorder=TRUE, .combine=rbind) %dopar% {
		node_time_start <- proc.time()[3]
		prediction_i <- predict(dfrm_nest$model[[i]], newdata = dfrm_nest$data_means[[i]], type="response")

		# For loop to do the B bootstrap samples.
		for(j in 1:B) {
			bs_dfrm_nest <- dfrm %>% sample_frac(size=0.25, replace=TRUE) %>% pivot_longer(!c("Y_id", "Y", contains("X.")), names_to="indicators", values_to="values") %>% group_by(indicators) %>% nest(data = c(Y_id, Y, contains("X."), values))

			bs_dfrm_nest <- bs_dfrm_nest %>% mutate(
				model = map(data, distreg_fitter))

			prediction_bs_vec[j] <- predict(bs_dfrm_nest$model[[i]], newdata = dfrm_nest$data_means[[i]], type="response")
		}
		prediction_se_i <- sd(prediction_bs_vec,na.rm=TRUE)

		prediction_ci_critval_i = max( abs(prediction_bs_vec - prediction_i)/prediction_se_i )

		node_time_end <- proc.time()[3]

		prediction_output <- c(predictions=prediction_i, prediction_se=prediction_se_i, prediction_ci_critval=prediction_ci_critval_i, clock=round(dr_clock_end - dr_clock_start, 3))

		prediction_output

	}

	on.exit(stopCluster(cl))

	cdfhat_time_end <- proc.time()[3]
	cdf_hat <- as.data.frame(cdf_hat)
	message("Total time to estimate CDF with standard errors: ", round(cdfhat_time_end - cdfhat_time_start, 3)," seconds. Time for nodes: ", round(sum(cdf_hat$clock.elapsed),3) )

	dfrm_nest <- cbind(dfrm_nest, predictions=cdf_hat$predictions, prediction_se=cdf_hat$prediction_se, prediction_ci_critval=cdf_hat$prediction_ci_critval, Y_quantiles=nodes)

	return(dfrm_nest)
}

# Function to monotonize estimates using the Rearrangement package's tool (https://cran.r-project.org/web/packages/Rearrangement/Rearrangement.pdf). Mostly a wrapper for the rearrangement() function. Accepts a dataframe or tibble as input, returns a dataframe/tibble as output.
monotonize_estimates <- function(dfrm){
	X <- as.data.frame(rbind(0,dfrm[,c("Y_quantiles")]))
	Y <- dfrm[,"predictions"] %>% unlist() %>% as.numeric()
	Y <- c(0,Y)
	rr_predictions <- rearrangement(X,Y,avg=TRUE)
	rr_quantiles <- names(rr_predictions)
	names(rr_predictions) <- NULL
	ci_critval_95 <- as.numeric(quantile(dfrm$prediction_ci_critval, probs=0.95, na.rm=TRUE))

	Y_se_lower <- (dfrm[,"predictions"] - ci_critval_95*dfrm[,"prediction_se"]) %>% unlist() %>% as.numeric()
	Y_se_lower <- c(0,Y_se_lower)
	rr_se_lower <- rearrangement(X,Y_se_lower,avg=TRUE)
	names(rr_se_lower) <- NULL
	rr_se_lower <- pmax(rr_se_lower,0)

	Y_se_upper <- (dfrm[,"predictions"] + ci_critval_95*dfrm[,"prediction_se"]) %>% unlist() %>% as.numeric()
	Y_se_upper <- c(0,Y_se_upper)
	rr_se_upper <- rearrangement(X,Y_se_upper,avg=TRUE)
	names(rr_se_upper) <- NULL
	rr_se_upper <- pmin(rr_se_upper,1)

	rr_predictions_dfrm <- data.frame(rr_quantiles = rr_quantiles, rr_predictions = rr_predictions, rr_se_lower = rr_se_lower, rr_se_upper = rr_se_upper)
	dfrm_0 <- rbind( data.frame(indicators="Y_00", Y_quantiles=as.numeric(0), predictions=as.numeric(0), prediction_se=as.numeric(0), prediction_ci_critval=as.numeric(0)) , as.data.frame(dfrm))
	dfrm_rr <- cbind(dfrm_0, rr_predictions_dfrm)

	write.csv(dfrm_rr, "dfrm_rr.csv")
	dfrm_rr <- read_csv("dfrm_rr.csv")

	dfrm_rr <- dfrm_rr %>% mutate(rr_quantiles_perc = str_replace(rr_quantiles, "%", ""),
												rr_quantiles_perc = as.numeric(rr_quantiles_perc),
												rr_quantiles = max(Y_quantiles)*rr_quantiles_perc/100 )

	return(dfrm_rr)
}

# Function to generate counterfactual Kesslertime estimates and plots. Takes as input a dataframe of sim_results_summary, an unformatted vector of counterfactual means for the scenario, and a scenario label for outputs. Outputs and writes out a dataframe of estimates. Mainly a wrapper for format_datameans and dist_reg, so it can take some time.
run_ktime_cf <- function(input, cf_means, label, ncores=10){

	cf_means <- format_datameans(cf_means)
	cf_means
	sim_results_distreg_cf <- dist_reg(input=input, npts=100, ncores=ncores, counterfactual_means=cf_means)
	output <- sim_results_distreg_cf %>% select(Y_quantiles, predictions, prediction_se, prediction_ci_critval)
	write.csv(output, file=paste0("sim_dr_cf_",label,".csv"))

	output <- monotonize_estimates(output)

	ktime_plot <- gen_ktime_plots(output)

	png(width=1000,height=950,filename=paste0("../../images/kesslertime_cdf_plot_cf_",label,".png"))
	print(ktime_plot$ktime_all + ktime_plot$ktime_50 + plot_layout(ncol = 1, heights = c(1, 1), guides = "collect"))
	dev.off()

	return(output)
}


# Function to generate kesslertime plots. Takes a dataframe of rearranged functional and interval estimates as an input, outputs a list of plots.
gen_ktime_plots <- function(input) {
	ktime_all <- ggplot(data=(input %>% filter(Y_quantiles<=n_years)), aes(x=(rr_quantiles+2020) )) + 
		geom_line(aes(y=rr_predictions)) +
		geom_line(aes(y=predictions), linetype="dashed") +
		geom_ribbon(aes(ymin=rr_se_lower, ymax=rr_se_upper), alpha = 0.25) +
		labs(y="Probability", x="Year", title="Probability of Kessler Syndrome by year t") + 
	  theme_bw() +
		theme(text = element_text(size=24))

	ktime_150 <- ggplot(data=(input %>% filter(Y_quantiles<=150)) , aes(x=(rr_quantiles+2020) )) + 
		geom_line(aes(y=rr_predictions)) +
		geom_line(aes(y=predictions), linetype="dashed") +
		geom_ribbon(aes(ymin=rr_se_lower, ymax=rr_se_upper), alpha = 0.25) + 
		labs(y="Probability", x="Year", title="Probability of Kessler Syndrome by year t") +
	  theme_bw() + 
		theme(text = element_text(size=24))

	ktime_50 <- ggplot(data=(input %>% filter(Y_quantiles<=50)) , aes(x=(rr_quantiles+2020) )) + 
		geom_line(aes(y=rr_predictions)) +
		geom_line(aes(y=predictions), linetype="dashed") +
		geom_ribbon(aes(ymin=rr_se_lower, ymax=rr_se_upper), alpha = 0.25) + 
		labs(y="Probability", x="Year", title="Probability of Kessler Syndrome by year t") +
	  theme_bw() + 
		theme(text = element_text(size=24))

	ktime_50_prodn <- ggplot(data=(input %>% filter(Y_quantiles<=50)) , aes(x=(rr_quantiles+2020) )) + 
		geom_line(aes(y=rr_predictions)) +
		geom_ribbon(aes(ymin=rr_se_lower, ymax=rr_se_upper), alpha = 0.25) + 
		labs(y="Probability", x="Year", title="Probability of Kessler Syndrome by year t") +
	  theme_bw() + 
		theme(text = element_text(size=24),
			plot.margin = unit(c(1,1,1,1), "cm"))

		return(list(ktime_all=ktime_all, ktime_150=ktime_150, ktime_50=ktime_50, ktime_50_prodn=ktime_50_prodn))
}

# Function to assemble timeseries plots of key quantities, along with histogram of kessler times
gen_timeseries_plots <- function(data, cutoff, histbinwidth=1) {

	kesslertime_hist <- ggplot(data=sim_results_summary_small %>% filter(Kessler_time<=50), aes(x=Kessler_time, y=after_stat(count/sum(count)))) +
		geom_histogram(binwidth=histbinwidth) +
		theme_bw() + labs(x = "Kessler time", y = "Proportion") + theme(text = element_text(size=24)) + ggtitle("Simulated empirical distribution of Kessler times")

	png(width=800,height=300,filename=paste0("../../images/kesslertime_hist_plot.png"))
	kesslertime_hist
	dev.off()

	sim_results_big_plotbase <- ggplot(data = (sim_results_big %>% filter(lead(Kessler_ind <= 0)) ), aes(group=sim, color=elasticity, x=(year+2020))) + theme_bw() + labs(x = "Year", y="", color = "Occupancy elasticity\nof satellite revenue") + theme(text = element_text(size=24)) + scale_color_viridis()

	sim_results_plot_sats <- sim_results_big_plotbase + geom_line(aes(y=sats), alpha = 0.1) + ylab("Satellites") + ggtitle("Satellites") + ylab("")
	sim_results_plot_debs <- sim_results_big_plotbase + geom_line(aes(y=debs), alpha = 0.1) + ylab("Debris") + ggtitle("Debris") + ylab("")
	sim_results_plot_launches <- sim_results_big_plotbase + geom_line(aes(y=launches), alpha = 0.1) + ylab("Launches") + ggtitle("Launches") + ylab("")
	sim_results_plot_risk <- sim_results_big_plotbase + geom_line(aes(y=risk), alpha = 0.1) + ylab("Risk") + ggtitle("Collision risk") + ylab("")

layout <- '
AABB
AABB
CCDD
CCDD
EEEE
'

	sim_results_plot_full <- sim_results_plot_launches + sim_results_plot_sats + sim_results_plot_debs + sim_results_plot_risk + kesslertime_hist +
	  plot_layout(design=layout, guides = "collect") + plot_annotation(tag_levels='a')

sim_results_plot_horizon_list <- list()

	for(i in seq_along(cutoff)){

		sim_results_big_plotbase <- ggplot(data = (sim_results_big %>% filter(lead(Kessler_ind <= 0), year <= cutoff[i]) ), aes(group=sim, color=elasticity, x=(year+2020))) + theme_bw() + labs(x = "Year", y="", color = "Occupancy elasticity\nof satellite revenue") + theme(text = element_text(size=24)) + scale_color_viridis()

		sim_results_plot_sats <- sim_results_big_plotbase + geom_line(aes(y=sats), alpha = 0.1) + ylab("Satellites") + ggtitle("Satellites") + ylab("")
		sim_results_plot_debs <- sim_results_big_plotbase + geom_line(aes(y=debs), alpha = 0.1) + ylab("Debris") + ggtitle("Debris") + ylab("")
		sim_results_plot_launches <- sim_results_big_plotbase + geom_line(aes(y=launches), alpha = 0.1) + ylab("Launches") + ggtitle("Launches") + ylab("")
		sim_results_plot_risk <- sim_results_big_plotbase + geom_line(aes(y=risk), alpha = 0.1) + ylab("Risk") + ggtitle("Collision risk") + ylab("")

		sim_results_plot_horizon_list[[i]] <- sim_results_plot_launches + sim_results_plot_sats + sim_results_plot_debs + sim_results_plot_risk + kesslertime_hist +
	  plot_layout(design=layout, guides = "collect") + plot_annotation(tag_levels='a')

	}


	return(list(sim_results_plot_full = sim_results_plot_full, sim_results_plot_horizon=sim_results_plot_horizon_list))

}
